from bs4 import BeautifulSoup
import requests

"""
requests模块的编码流程
1.发送URL
2.发起请求
3.获取响应数据
4.持久化存储
"""
# 创建文件夹
# filename = './sanguo'
# if not os.path.exists(filename):
#     os.mkdir(filename)
header = {
            'User-Agent': 'Mozilla/5.0(Windows NT 10.0;WOW64)AppleWebKit/537.36(KHTML, like Gecko) Chrome / 86.0.4240.198 Safari / 537.36'
        }

def main():

    url = 'https://www.shicimingju.com/book/sanguoyanyi.html'

    sanguo = requests.get(url=url, headers=header)
    sanguo.encoding = 'utf-8'
    sanguo_text = sanguo.text
    # print(sanguo_text)
    soup = BeautifulSoup(sanguo_text, 'lxml')
    src = soup.find('div', class_='book-mulu').find_all('a')[0:50]
    # print(type(name))
    for m in src:
        url2 = 'https://www.shicimingju.com'
        url2 = url2 + m['href']
        file = str(m.string)
        # file = re.search("第.*?<", str(m)).group()[:-1]
        file_name = file.replace(' ', '_').replace('·', '_')

        neirong_src = requests.get(url=url2, headers=header)
        neirong_src.encoding = 'utf-8'
        neirong_text = neirong_src.text
        soup = BeautifulSoup(neirong_text, 'lxml')
        neirong = soup.find('div', class_='chapter_content').text.strip()
        fp = open('./sanguo/' + file.replace(' ', '_').replace('·', '_') + '.txt', 'w', encoding='utf-8')
        fp.write(str(neirong))
        print(file_name)
        print(neirong)

    print("保存成功")


if __name__ == "__main__":
    main()
